Because of a bug with reference counting against the target guest page
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Thu, 13 Oct 2005 16:58:01 +0000 (17:58 +0100)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Thu, 13 Oct 2005 16:58:01 +0000 (17:58 +0100)
when searching the list for L1 shadow pages to write protect that page
(at shadow_promote(), which is called by alloc_shadow_page()), the code
was always scanning _all_ the entries in the hash list. The length of
the hash list can be >500 for L1 shadow pages, and for each page we
needed to check all the PTEs in the page.

The patch attached does the following things:
- Correct the reference count (for the target guest page) so that=20
  it can exit the loop when all the L1 shadow pages to modify are found.
  Even with this, we can search the entire list if the page is at=20
  the end.
- Try to avoid the search in the hash list, by having a
  back pointer (as a hint) to the shadow page pfn. For most cases,=20
  there is a single translation for the guest page in the shadow.
- Cleanups, remove the nested function fix_entry

With those, the kernel build performance, for example, was improved
approximately by 20%, 40% on 32-bit, 64-bit unmodified Linux guests,
respectively. Tested log-dirty mode for plain 32-bit as well.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Asit Mallick <asit.k.mallick@intel.com>
xen/arch/x86/shadow.c
xen/arch/x86/shadow32.c
xen/arch/x86/shadow_public.c
xen/include/asm-x86/shadow.h

index 18e44cc9c04796839a8f72c3c9d9414e5a9ed47f..dfea7b4f43441f4e280810efa51d1cc47b64f201 100644 (file)
@@ -616,13 +616,6 @@ static void shadow_map_l1_into_current_l2(unsigned long va)
         pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
         spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
 
-        /*
-        gpl1e = &(linear_pg_table[l1_linear_offset(va) &
-                              ~(L1_PAGETABLE_ENTRIES-1)]);
-
-        spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
-                                     ~(L1_PAGETABLE_ENTRIES-1)]);*/
-
         for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
         {
             l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
@@ -645,7 +638,8 @@ static void shadow_map_l1_into_current_l2(unsigned long va)
                 min = i;
             if ( likely(i > max) )
                 max = i;
-        }
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
+          }
 
         frame_table[sl1mfn].tlbflush_timestamp =
             SHADOW_ENCODE_MIN_MAX(min, max);
@@ -716,8 +710,8 @@ shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
         }
     }
 
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     __shadow_set_l1e(v, va, &new_spte);
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 
@@ -1135,6 +1129,24 @@ decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned
     }
 }
 
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
+}
+
 static u32 remove_all_write_access_in_ptpage(
     struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
     unsigned long readonly_gpfn, unsigned long readonly_gmfn,
@@ -1156,49 +1168,28 @@ static u32 remove_all_write_access_in_ptpage(
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
-
-        return (found == max_refs_to_find);
-    }
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
 
-    i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
     }
  
     for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 static int remove_all_write_access(
@@ -1206,8 +1197,8 @@ static int remove_all_write_access(
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -1238,26 +1229,18 @@ static int remove_all_write_access(
         return 1;
     }
 
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
+
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -1276,7 +1259,7 @@ static int remove_all_write_access(
             {
                 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -1376,7 +1359,7 @@ static int resync_all(struct domain *d, u32 stype)
                      guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
-
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
@@ -1604,6 +1587,8 @@ static void sync_all(struct domain *d)
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
index dbdbd52a8ae5c44089614856dd2f37968c4c124e..0b884481811b5ce33994c9ef8d58f8c454eb6c5a 100644 (file)
@@ -1032,7 +1032,12 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
@@ -1390,18 +1395,6 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
     return rc;
 }
 
-/*
- * XXX KAF: Why is this VMX specific?
- */
-void vmx_shadow_clear_state(struct domain *d)
-{
-    SH_VVLOG("%s:", __func__);
-    shadow_lock(d);
-    free_shadow_pages(d);
-    shadow_unlock(d);
-    update_pagetables(d->vcpu[0]);
-}
-
 unsigned long
 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
 {
@@ -1462,14 +1455,10 @@ shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
 
     hl2 = map_domain_page(hl2mfn);
 
-#ifdef __i386__
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
         limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
-    limit = 0; /* XXX x86/64 XXX */
-#endif
 
     memset(hl2, 0, limit * sizeof(l1_pgentry_t));
 
@@ -1665,6 +1654,7 @@ void shadow_map_l1_into_current_l2(unsigned long va)
                 min = i;
             if ( likely(i > max) )
                 max = i;
+            set_guest_back_ptr(d, sl1e, sl1mfn, i);
         }
 
         frame_table[sl1mfn].tlbflush_timestamp =
@@ -2072,6 +2062,24 @@ free_writable_pte_predictions(struct domain *d)
     }
 }
 
+static int fix_entry(
+    struct domain *d, 
+    l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
+{
+    l1_pgentry_t old = *pt;
+    l1_pgentry_t new = old;
+
+    l1e_remove_flags(new,_PAGE_RW);
+    if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+        BUG();
+    (*found)++;
+    *pt = new;
+    if ( is_l1_shadow )
+        shadow_put_page_from_l1e(old, d);
+
+    return (*found == max_refs_to_find);
+}
+
 static u32 remove_all_write_access_in_ptpage(
     struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
     unsigned long readonly_gpfn, unsigned long readonly_gmfn,
@@ -2088,49 +2096,28 @@ static u32 remove_all_write_access_in_ptpage(
 
     match = l1e_from_pfn(readonly_gmfn, flags);
 
-    // returns true if all refs have been found and fixed.
-    //
-    int fix_entry(int i)
-    {
-        l1_pgentry_t old = pt[i];
-        l1_pgentry_t new = old;
-
-        l1e_remove_flags(new,_PAGE_RW);
-        if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
-            BUG();
-        found++;
-        pt[i] = new;
-        if ( is_l1_shadow )
-            shadow_put_page_from_l1e(old, d);
-
-#if 0
-        printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
-               "is_l1_shadow=%d\n",
-               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
-#endif
+    if ( shadow_mode_external(d) ) {
+        i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask) 
+            >> PGT_va_shift;
 
-        return (found == max_refs_to_find);
+        if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
+             !l1e_has_changed(pt[i], match, flags) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
+             !prediction )
+            goto out;
     }
 
-    i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
-    if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
-    {
-        perfc_incrc(remove_write_fast_exit);
-        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
-        unmap_domain_page(pt);
-        return found;
-    }
     for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
     {
-        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+        if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && 
+             fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
             break;
     }
 
+out:
     unmap_domain_page(pt);
 
     return found;
-#undef MATCH_ENTRY
 }
 
 int shadow_remove_all_write_access(
@@ -2138,8 +2125,8 @@ int shadow_remove_all_write_access(
 {
     int i;
     struct shadow_status *a;
-    u32 found = 0, fixups, write_refs;
-    unsigned long prediction, predicted_gpfn, predicted_smfn;
+    u32 found = 0, write_refs;
+    unsigned long predicted_smfn;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_MFN(readonly_gmfn));
@@ -2169,27 +2156,19 @@ int shadow_remove_all_write_access(
         perfc_incrc(remove_write_no_work);
         return 1;
     }
+    
+    if ( shadow_mode_external(d) ) {
+        if (write_refs-- == 0) 
+            return 0;
 
-    // Before searching all the L1 page tables, check the typical culprit first
-    //
-    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
-    {
-        predicted_gpfn = prediction & PGT_mfn_mask;
-        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
-             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
-        {
-            found += fixups;
-            if ( found == write_refs )
-            {
-                perfc_incrc(remove_write_predicted);
-                return 1;
-            }
-        }
-        else
-        {
-            perfc_incrc(remove_write_bad_prediction);
-            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
-        }
+         // Use the back pointer to locate the shadow page that can contain
+         // the PTE of interest
+         if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) {
+             found += remove_all_write_access_in_ptpage(
+                 d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0);
+             if ( found == write_refs )
+                 return 0;
+         }
     }
 
     // Search all the shadow L1 page tables...
@@ -2203,7 +2182,7 @@ int shadow_remove_all_write_access(
             {
                 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
                 if ( found == write_refs )
-                    return 1;
+                    return 0;
             }
 
             a = a->next;
@@ -2376,12 +2355,12 @@ static int resync_all(struct domain *d, u32 stype)
                      l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
+                    set_guest_back_ptr(d, shadow1[i], smfn, i);
 
                     // can't update snapshots of linear page tables -- they
                     // are used multiple times...
                     //
                     // snapshot[i] = new_pte;
-
                     changed++;
                 }
             }
@@ -2530,6 +2509,8 @@ void __shadow_sync_all(struct domain *d)
              !shadow_get_page_from_l1e(npte, d) )
             BUG();
         *ppte = npte;
+        set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT, 
+                           (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t));
         shadow_put_page_from_l1e(opte, d);
 
         unmap_domain_page(ppte);
index 292ef81e7df18ef99de66581b77b541b7bf82471..36266128df2958db2849a7528b775842d70d21c5 100644 (file)
@@ -1095,7 +1095,12 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
             if ( !get_page_type(page, PGT_writable_page) )
                 BUG();
             put_page_type(page);
-
+            /*
+             * We use tlbflush_timestamp as back pointer to smfn, and need to
+             * clean up it.
+             */
+            if ( shadow_mode_external(d) )
+                page->tlbflush_timestamp = 0;
             list_ent = page->list.next;
         }
     }
index 0e48038889d201b80b3a3650ca1effe944ad7b9a..3e2f3b9d6a64380699a59d465a8a68a973061f5f 100644 (file)
@@ -718,6 +718,23 @@ shadow_unpin(unsigned long smfn)
     put_shadow_ref(smfn);
 }
 
+/*
+ * SMP issue. The following code assumes the shadow lock is held. Re-visit
+ * when working on finer-gained locks for shadow.
+ */
+static inline void set_guest_back_ptr(
+    struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int index)
+{
+    if ( shadow_mode_external(d) ) {
+        unsigned long gmfn;
+
+        ASSERT(shadow_lock_is_acquired(d));
+        gmfn = l1e_get_pfn(spte);
+        frame_table[gmfn].tlbflush_timestamp = smfn;
+        frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask;
+        frame_table[gmfn].u.inuse.type_info |= (unsigned long) index << PGT_va_shift;
+    }
+}
 
 /************************************************************************/
 #if CONFIG_PAGING_LEVELS <= 2
@@ -1611,10 +1628,11 @@ shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
             if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
                 shadow_put_page_from_l1e(old_spte, d);
         }
+
     }
 
+    set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
     shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
-
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 #endif